Supplementary Workflow 10 - CNN Blackbox Visualisation Tool¶

Aim¶

  • For assisting in Image Classification Workflow
  • Troubleshooting Image Classification Accuracy and Model Overfit
  • Code Licensed as Detailed from below

Method¶

  • Gradient-weighted Class Activation Mapping (Grad-CAM), uses the gradients of any target concept, flowing into the final convolutional layer to produce a coarse localization map highlighting the important regions in the image https://arxiv.org/abs/1610.02391

Setup¶

In [ ]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "fundamentals"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

Method 1 - Conv Net layers - Feature Map Extraction (VGG)¶

In [34]:
# Load the model weights into memory and print a summary of the loaded model.
# summarize feature map size for each conv layer
from tensorflow.keras.applications.vgg16 import VGG16
from matplotlib import pyplot
# load the model
model = VGG16()
#model = "/Users/anthonysutton/ml2/tensorflow-for-poets-2/tf_files/retrained_graph.pb"
# summarize feature map shapes
for i in range(len(model.layers)):
    layer = model.layers[i]
    # check for convolutional layer
    if 'conv' not in layer.name:
        continue
    # summarize output shape
    print(i, layer.name, layer.output.shape)
1 block1_conv1 (?, 224, 224, 64)
2 block1_conv2 (?, 224, 224, 64)
4 block2_conv1 (?, 112, 112, 128)
5 block2_conv2 (?, 112, 112, 128)
7 block3_conv1 (?, 56, 56, 256)
8 block3_conv2 (?, 56, 56, 256)
9 block3_conv3 (?, 56, 56, 256)
11 block4_conv1 (?, 28, 28, 512)
12 block4_conv2 (?, 28, 28, 512)
13 block4_conv3 (?, 28, 28, 512)
15 block5_conv1 (?, 14, 14, 512)
16 block5_conv2 (?, 14, 14, 512)
17 block5_conv3 (?, 14, 14, 512)
In [32]:
# visualize feature maps output from each block in the vgg model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Model
from matplotlib import pyplot
from numpy import expand_dims
# load the model
model = VGG16()
# redefine model to output right after the first hidden layer
ixs = [2, 5, 9, 13, 17]
outputs = [model.layers[i].output for i in ixs]
model = Model(inputs=model.inputs, outputs=outputs)
# load the image with the required shape
img = load_img('ldd_image.jpg', target_size=(224, 224))
# convert the image to an array
img = img_to_array(img)
# expand dimensions so that it represents a single 'sample'
img = expand_dims(img, axis=0)
# prepare the image (e.g. scale pixel values for the vgg)
img = preprocess_input(img)
# get feature map for first hidden layer
feature_maps = model.predict(img)
# plot the output from each block
square = 8
ctr = 1
for fmap in feature_maps:
    ctr += 1
    print(ctr)
    # plot all 64 maps in an 8x8 squares    
    fig=pyplot.figure(figsize=(18, 16), dpi= 80, facecolor='w', edgecolor='k')
    ix = 1
    for _ in range(square):
        for _ in range(square):
            # specify subplot and turn of axis
            ax = pyplot.subplot(square, square, ix)
            ax.set_xticks([])
            ax.set_yticks([])
    # plot filter channel in grayscale
            pyplot.imshow(fmap[0, :, :, ix-1], cmap='gray')
            ix += 1
    # show the figure
    pyplot.savefig(str(ctr) + "conv_net.png", dpi=80)
    pyplot.show()
2
3
4
5
6

Method 2 - Grad Cam Heat Maps (VGG)¶

Python Scripting by Jacob Gil: https://github.com/jacobgil/keras-grad-cam

Usage: python grad-cam.py

In [ ]:
from keras.applications.mobilenet_v2 import (
    MobileNetV2, preprocess_input, decode_predictions)

from keras.preprocessing import image
from keras.layers.core import Lambda
from keras.models import Sequential
from tensorflow.python.framework import ops
import keras.backend as K
import tensorflow as tf
import numpy as np
import keras
import sys
import cv2
from keras.models import Model

def _compute_gradients(tensor, var_list):
    grads = tf.gradients(tensor, var_list)
    return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]


def target_category_loss(x, category_index, nb_classes):
    return tf.multiply(x, K.one_hot([category_index], nb_classes))

def target_category_loss_output_shape(input_shape):
    return input_shape

def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)

def load_image(path):
    img_path = sys.argv[1]
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x

def register_gradient():
    if "GuidedBackProp" not in ops._gradient_registry._registry:
        @ops.RegisterGradient("GuidedBackProp")
        def _GuidedBackProp(op, grad):
            dtype = op.inputs[0].dtype
            return grad * tf.cast(grad > 0., dtype) * \
                tf.cast(op.inputs[0] > 0., dtype)

def compile_saliency_function(model, activation_layer='Conv_1'):
    input_img = model.input
    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
    layer_output = layer_dict[activation_layer].output
    max_output = K.max(layer_output, axis=3)
    saliency = K.gradients(K.sum(max_output), input_img)[0]
    return K.function([input_img, K.learning_phase()], [saliency])

def modify_backprop(model, name):
    g = tf.get_default_graph()
    with g.gradient_override_map({'Relu': name}):

        # get layers that have an activation
        layer_dict = [layer for layer in model.layers[1:]
                      if hasattr(layer, 'activation')]

        # replace relu activation
        for layer in layer_dict:
            if layer.activation == keras.activations.relu:
                layer.activation = tf.nn.relu

        # re-instanciate a new model
        new_model = MobileNetV2(weights='imagenet')
    return new_model

def deprocess_image(x):
    '''
    Same normalization as in:
    https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
    '''
    if np.ndim(x) > 3:
        x = np.squeeze(x)
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255

    #if K.image_dim_ordering() == 'th':
    if  keras.backend.image_data_format() == 'th':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

def grad_cam(input_model, image, category_index, layer_name):
    nb_classes = 1000
    target_layer = lambda x: target_category_loss(x, category_index, nb_classes)
    x = Lambda(target_layer, output_shape = target_category_loss_output_shape)(input_model.output)
    model = Model(inputs=input_model.input, outputs=x)
    model.summary()
    loss = K.sum(model.output)
    conv_output =  [l for l in model.layers if l.name is layer_name][0].output
    grads = normalize(_compute_gradients(loss, [conv_output])[0])
    gradient_function = K.function([model.input], [conv_output, grads])

    output, grads_val = gradient_function([image])
    output, grads_val = output[0, :], grads_val[0, :, :, :]

    weights = np.mean(grads_val, axis = (0, 1))
    cam = np.ones(output.shape[0 : 2], dtype = np.float32)

    for i, w in enumerate(weights):
        cam += w * output[:, :, i]

    cam = cv2.resize(cam, (224, 224))
    cam = np.maximum(cam, 0)
    heatmap = cam / np.max(cam)

    #Return to BGR [0..255] from the preprocessed image
    image = image[0, :]
    image -= np.min(image)
    image = np.minimum(image, 255)

    cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
    cam = np.float32(cam) + np.float32(image)
    cam = 255 * cam / np.max(cam)
    return np.uint8(cam), heatmap

preprocessed_input = load_image(sys.argv[1])

model = MobileNetV2(weights='imagenet')

predictions = model.predict(preprocessed_input)
top_1 = decode_predictions(predictions)[0][0]
print('Predicted class:')
print('%s (%s) with probability %.2f' % (top_1[1], top_1[0], top_1[2]))

predicted_class = np.argmax(predictions)
cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "Conv_1")
cv2.imwrite("gradcam.jpg", cam)

register_gradient()
guided_model = modify_backprop(model, 'GuidedBackProp')
saliency_fn = compile_saliency_function(guided_model)
saliency = saliency_fn([preprocessed_input, 0])
gradcam = saliency[0] * heatmap[..., np.newaxis]
cv2.imwrite("guided_gradcam.jpg", deprocess_image(gradcam))

MobileNet v2

  • Predicted Label: House
  • True Label: Office
  • See Dissertation Write Up for Comprehensive Examples of Grad Cam applied to London Buildings Dataset

Method 3 - ELI5 (GradCam applied to MobileNet Weights)¶

ELI5 is a Python package which helps to debug machine learning classifiers and explain their predictions.

  • Copyright 2019 The ELI5 Authors.

Use ELI5, Keras and TF v14 Environment¶

cd ml3 activate env3

In [2]:
from PIL import Image
from IPython.display import display
import numpy as np

# you may want to keep logging enabled when doing your own work
import logging
import tensorflow as tf
#tf.get_logger().setLevel(logging.ERROR) 
import warnings
warnings.simplefilter("ignore") 
import keras
from keras.applications import mobilenet_v2

import eli5
Using TensorFlow backend.
In [3]:
model = mobilenet_v2.MobileNetV2(include_top=True, weights='imagenet', classes=1000)

# check the input format
print(model.input_shape)
dims = model.input_shape[1:3] # -> (height, width)
print(dims)
(None, 224, 224, 3)
(224, 224)
In [5]:
image_uri = 'mis_flat_true_office.jpg'
#image_uri = 'mis_house_true_office.jpg'

# check the image with Pillow
im = Image.open(image_uri)
print(type(im))
display(im)
<class 'PIL.JpegImagePlugin.JpegImageFile'>
In [8]:
# we could resize the image manually
# but instead let's use a utility function from `keras.preprocessing`
# we pass the required dimensions as a (height, width) tuple
im = keras.preprocessing.image.load_img(image_uri, target_size=dims) # -> PIL image
print(im)
display(im)
<PIL.Image.Image image mode=RGB size=224x224 at 0x1F2C2CE94C8>
In [9]:
# we use a routine from `keras.preprocessing` for that as well
# we get a 'doc', an object almost ready to be inputted into the model

doc = keras.preprocessing.image.img_to_array(im) # -> numpy array
print(type(doc), doc.shape)
<class 'numpy.ndarray'> (224, 224, 3)
In [10]:
#get the batch size

#numpy routine to create an axis in the first position
doc = np.expand_dims(doc, axis=0)
print(type(doc), doc.shape)
<class 'numpy.ndarray'> (1, 224, 224, 3)
In [11]:
# `keras.applications` models come with their own input preprocessing function
# for best results, apply that as well

# mobilenetv2-specific preprocessing
# (this operation is in-place)
mobilenet_v2.preprocess_input(doc)
print(type(doc), doc.shape)
<class 'numpy.ndarray'> (1, 224, 224, 3)
In [12]:
# take back the first image from our 'batch'
image = keras.preprocessing.image.array_to_img(doc[0])
print(image)
display(image)
<PIL.Image.Image image mode=RGB size=224x224 at 0x1F2C2CE60C8>
In [13]:
# make a prediction about our sample image
predictions = model.predict(doc)
print(type(predictions), predictions.shape)
WARNING: Logging before flag parsing goes to stderr.
W0930 15:19:48.609761 11112 deprecation_wrapper.py:119] From C:\Users\Admin\Anaconda3\envs\env5\lib\site-packages\keras\backend\tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

<class 'numpy.ndarray'> (1, 1000)
In [14]:
# check the top 5 indices
# `keras.applications` contains a function for that

top = mobilenet_v2.decode_predictions(predictions)
top_indices = np.argsort(predictions)[0, ::-1][:5]

print(top)
print(top_indices)
[[('n03770679', 'minivan', 0.37663382), ('n03796401', 'moving_van', 0.14030492), ('n03345487', 'fire_engine', 0.11942945), ('n03769881', 'minibus', 0.09950304), ('n02701002', 'ambulance', 0.034322377)]]
[656 675 555 654 407]
In [15]:
# we need to pass the network
# the input as a numpy array
eli5.show_prediction(model, doc)
Out[15]:
In [16]:
eli5.show_prediction(model, doc, image=image)
Out[16]:
In [35]:
#make the model classify other objects and check where the classifier looks to find those objects
#cat_idx = 282 # ImageNet ID for "tiger_cat" class, because we have a cat in the picture
#eli5.show_prediction(model, doc, targets=[cat_idx]) # pass the class id
In [36]:
#window_idx = 904 # 'window screen'
#turtle_idx = 35 # 'mud turtle', some nonsense
#display(eli5.show_prediction(model, doc, targets=[window_idx]))
#display(eli5.show_prediction(model, doc, targets=[turtle_idx]))
In [37]:
# Under the hood Grad-CAM takes a hidden layer inside the network and 
# ifferentiates it with respect to the output scores. We have the ability 
#to choose which hidden layer we do our computations on
# we could use model.summary() here, but the model has over 100 layers. 
# we will only look at the first few and last few layers

head = model.layers[:5]
tail = model.layers[-8:]

def pretty_print_layers(layers):
    for l in layers:
        info = [l.name, type(l).__name__, l.output_shape, l.count_params()]
        pretty_print(info)

def pretty_print(lst):
    s = ',\t'.join(map(str, lst))
    print(s)

pretty_print(['name', 'type', 'output shape', 'param. no'])
print('-'*100)
pretty_print([model.input.name, type(model.input), model.input_shape, 0])
pretty_print_layers(head)
print()
print('...')
print()
pretty_print_layers(tail)
name,	type,	output shape,	param. no
----------------------------------------------------------------------------------------------------
input_6:0,	<class 'tensorflow.python.framework.ops.Tensor'>,	(None, 224, 224, 3),	0
input_6,	InputLayer,	[(None, 224, 224, 3)],	0
block1_conv1,	Conv2D,	(None, 224, 224, 64),	1792
block1_conv2,	Conv2D,	(None, 224, 224, 64),	36928
block1_pool,	MaxPooling2D,	(None, 112, 112, 64),	0
block2_conv1,	Conv2D,	(None, 112, 112, 128),	73856

...

block5_conv1,	Conv2D,	(None, 14, 14, 512),	2359808
block5_conv2,	Conv2D,	(None, 14, 14, 512),	2359808
block5_conv3,	Conv2D,	(None, 14, 14, 512),	2359808
block5_pool,	MaxPooling2D,	(None, 7, 7, 512),	0
flatten,	Flatten,	(None, 25088),	0
fc1,	Dense,	(None, 4096),	102764544
fc2,	Dense,	(None, 4096),	16781312
predictions,	Dense,	(None, 1000),	4097000
In [20]:
# pick a few convolutional layers that are 'far apart' and do Grad-CAM on them:
for l in ['block_2_expand', 'block_9_expand', 'Conv_1']:
    print(l)
    display(eli5.show_prediction(model, doc, layer=l)) # we pass the layer as an argument
#The layer parameter accepts a layer instance, index, name, or None (get layer automatically) as its arguments. 
#This is where Grad-CAM builds its heatmap from.
block_2_expand
block_9_expand
Conv_1
In [21]:
expl = eli5.explain_prediction(model, doc)
In [22]:
print(expl)
Explanation(estimator='mobilenetv2_1.00_224', description='Grad-CAM visualization for image classification; \noutput is explanation object that contains input image \nand heatmap image for a target.\n', error='', method='Grad-CAM', is_regression=False, targets=[TargetExplanation(target=656, feature_weights=None, proba=None, score=0.37663382, weighted_spans=None, heatmap=array([[0.02510191, 0.0318539 , 0.00107501, 0.08992572, 0.07111334,
        0.05832556, 0.02134247],
       [0.        , 0.        , 0.        , 0.        , 0.11829348,
        0.09976096, 0.        ],
       [0.        , 0.        , 0.        , 0.02226777, 0.1253502 ,
        0.07257652, 0.        ],
       [0.09020076, 0.19600925, 0.34677944, 0.35221505, 0.38139262,
        0.09063622, 0.        ],
       [0.28765884, 0.74981187, 0.96166665, 0.99999992, 0.45927177,
        0.        , 0.        ],
       [0.46863595, 0.82144505, 0.71545779, 0.82378856, 0.34421496,
        0.25040678, 0.04249703],
       [0.18286155, 0.38966186, 0.52952322, 0.557312  , 0.24641148,
        0.        , 0.01127328]]))], feature_importances=None, decision_tree=None, highlight_spaces=None, transition_features=None, image=<PIL.Image.Image image mode=RGB size=224x224 at 0x1F2D9C77708>)
In [23]:
# we can access the various attributes of a target being explained
print((expl.targets[0].target, expl.targets[0].score, expl.targets[0].proba))
(656, 0.37663382, None)
In [24]:
image = expl.image
heatmap = expl.targets[0].heatmap

display(image) # the .image attribute is a PIL image
print(heatmap) # the .heatmap attribute is a numpy array
[[0.02510191 0.0318539  0.00107501 0.08992572 0.07111334 0.05832556
  0.02134247]
 [0.         0.         0.         0.         0.11829348 0.09976096
  0.        ]
 [0.         0.         0.         0.02226777 0.1253502  0.07257652
  0.        ]
 [0.09020076 0.19600925 0.34677944 0.35221505 0.38139262 0.09063622
  0.        ]
 [0.28765884 0.74981187 0.96166665 0.99999992 0.45927177 0.
  0.        ]
 [0.46863595 0.82144505 0.71545779 0.82378856 0.34421496 0.25040678
  0.04249703]
 [0.18286155 0.38966186 0.52952322 0.557312   0.24641148 0.
  0.01127328]]
In [25]:
#Visualizing the heatmap:
heatmap_im = eli5.formatters.image.heatmap_to_image(heatmap)
display(heatmap_im)
In [ ]:
#resize
heatmap_im = eli5.formatters.image.expand_heatmap(heatmap, image, resampling_filter=Image.BOX)
display(heatmap_im)
In [26]:
#overlay the heatmap over the original image
I = eli5.format_as_image(expl)
display(I)
In [27]:
# colorisation of the heatmap
# opacity

import matplotlib.cm

I = eli5.format_as_image(expl, alpha_limit=1.0, colormap=matplotlib.cm.cividis)
display(I)
In [28]:
#swap the softmax (logits) layer of our current model with a linear (no activation) layer,
# first check the explanation *with* softmax
print('with softmax')
display(eli5.show_prediction(model, doc))


# remove softmax
l = model.get_layer(index=-1) # get the last (output) layer
l.activation = keras.activations.linear # swap activation

# save and load back the model as a trick to reload the graph
model.save('tmp_model_save_rmsoftmax') # note that this creates a file of the model
model = keras.models.load_model('tmp_model_save_rmsoftmax')

print('without softmax')
display(eli5.show_prediction(model, doc))
with softmax
without softmax
In [29]:
#nasnet comparison
from keras.applications import nasnet

model2 = nasnet.NASNetMobile(include_top=True, weights='imagenet', classes=1000)

# we reload the image array to apply nasnet-specific preprocessing
doc2 = keras.preprocessing.image.img_to_array(im)
doc2 = np.expand_dims(doc2, axis=0)
nasnet.preprocess_input(doc2)

print(model.name)
# note that this model is without softmax
display(eli5.show_prediction(model, doc))
print(model2.name)
display(eli5.show_prediction(model2, doc2))
W0930 15:21:23.060173 11112 deprecation_wrapper.py:119] From C:\Users\Admin\Anaconda3\envs\env5\lib\site-packages\keras\backend\tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0930 15:21:23.459333 11112 deprecation_wrapper.py:119] From C:\Users\Admin\Anaconda3\envs\env5\lib\site-packages\keras\backend\tensorflow_backend.py:4074: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

mobilenetv2_1.00_224
NASNet
In [30]:
#vgg comparison
from keras.applications import vgg19

model3 = vgg19.VGG19(include_top=True, weights='imagenet', classes=1000)

# we reload the image array to apply nasnet-specific preprocessing
doc3 = keras.preprocessing.image.img_to_array(im)
doc3 = np.expand_dims(doc3, axis=0)
vgg19.preprocess_input(doc3)

print(model.name)
# note that this model is without softmax
display(eli5.show_prediction(model, doc))
print(model3.name)
display(eli5.show_prediction(model2, doc3))
mobilenetv2_1.00_224
vgg19